--! This file is part of the FELIX firmware distribution (https://gitlab.cern.ch/atlas-tdaq-felix/firmware/).
--! Copyright (C) 2001-2021 CERN for the benefit of the ATLAS collaboration.
--! Authors:
--!               Frans Schreuder
--!
--!   Licensed under the Apache License, Version 2.0 (the "License");
--!   you may not use this file except in compliance with the License.
--!   You may obtain a copy of the License at
--!
--!       http://www.apache.org/licenses/LICENSE-2.0
--!
--!   Unless required by applicable law or agreed to in writing, software
--!   distributed under the License is distributed on an "AS IS" BASIS,
--!   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
--!   See the License for the specific language governing permissions and
--!   limitations under the License.

--!------------------------------------------------------------------------------
--!
--!           NIKHEF - National Institute for Subatomic Physics
--!
--!                       Electronics Department
--!
--!-----------------------------------------------------------------------------
--! @class pcie_ep_wrap
--!
--!
--! @author      Andrea Borga    (andrea.borga@nikhef.nl)<br>
--!              Frans Schreuder (frans.schreuder@nikhef.nl)
--!
--!
--! @date        07/01/2015    created
--!
--! @version     1.1
--!
--! @brief
--! Wrapper unit for the PCI Express core simulation model
--!
--! Notes:
--! Dec 08 2020 F. Schreuder <f.schreuder@nikhef.nl>
--!          Initial commit
--!
--!
--! @detail
--!
--!-----------------------------------------------------------------------------
--! ------------------------------------------------------------------------------
--! Wupper: PCIe Gen3 and Gen4 DMA Core for Xilinx FPGAs
--!
--! Copyright (C) 2021 Nikhef, Amsterdam (f.schreuder@nikhef.nl)
--!
--! Licensed under the Apache License, Version 2.0 (the "License");
--! you may not use this file except in compliance with the License.
--! You may obtain a copy of the License at
--!
--!         http://www.apache.org/licenses/LICENSE-2.0
--!
--! Unless required by applicable law or agreed to in writing, software
--! distributed under the License is distributed on an "AS IS" BASIS,
--! WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
--! See the License for the specific language governing permissions and
--! limitations under the License.
--
--! @brief ieee


library uvvm_util;
    context uvvm_util.uvvm_util_context;
library ieee, UNISIM;
    use ieee.numeric_std.all;
    use UNISIM.VCOMPONENTS.all;
    use ieee.std_logic_1164.all;
    use ieee.numeric_std.all;
    use ieee.numeric_std_unsigned.all;
    use work.pcie_package.all;
    use std.env.all;

entity pcie_ep_sim_model is
    generic(
        DATA_WIDTH: integer := 256;
        NUMBER_OF_DESCRIPTORS: integer := 5
    );
    port (
        cfg_fc_cpld                : out    std_logic_vector(11 downto 0);
        cfg_fc_cplh                : out    std_logic_vector(7 downto 0);
        cfg_fc_npd                 : out    std_logic_vector(11 downto 0);
        cfg_fc_nph                 : out    std_logic_vector(7 downto 0);
        cfg_fc_pd                  : out    std_logic_vector(11 downto 0);
        cfg_fc_ph                  : out    std_logic_vector(7 downto 0);
        --cfg_fc_sel                 : in     std_logic_vector(2 downto 0);
        --cfg_interrupt_msix_address : in     std_logic_vector(63 downto 0);
        --cfg_interrupt_msix_data    : in     std_logic_vector(31 downto 0);
        cfg_interrupt_msix_enable  : out    std_logic_vector(3 downto 0);
        cfg_interrupt_msix_fail    : out    std_logic;
        cfg_interrupt_msix_int     : in     std_logic;
        cfg_interrupt_msix_sent    : out    std_logic;
        cfg_mgmt_addr              : in     std_logic_vector(18 downto 0);
        --cfg_mgmt_byte_enable       : in     std_logic_vector(3 downto 0);
        cfg_mgmt_read              : in     std_logic;
        cfg_mgmt_read_data         : out    std_logic_vector(31 downto 0);
        cfg_mgmt_read_write_done   : out    std_logic;
        cfg_mgmt_write             : in     std_logic;
        --cfg_mgmt_write_data        : in     std_logic_vector(31 downto 0);
        clk                        : out    std_logic;
        m_axis_cq                  : out    axis_type;
        m_axis_r_cq                : in     axis_r_type;
        m_axis_r_rc                : in     axis_r_type;
        m_axis_rc                  : out    axis_type;
        reset                      : out    std_logic;
        s_axis_cc                  : in     axis_type;
        s_axis_r_cc                : out    axis_r_type;
        s_axis_r_rq                : out    axis_r_type;
        s_axis_rq                  : in     axis_type;
        sys_rst_n                  : in     std_logic;
        user_lnk_up                : out    std_logic := '1');
end entity pcie_ep_sim_model;

architecture sim of pcie_ep_sim_model is

    constant DESCRIPTOR_SIZE: std_logic_vector(47 downto 0) := x"0000_0001_0000";
    constant FROMHOST_DESCRIPTOR_SIZE: std_logic_vector(47 downto 0) := std_logic_vector(to_unsigned(4096*(DATA_WIDTH/256),48));

    constant user_clk_period: time  := 4 ns;

    constant BAR0: std_logic_vector(31 downto 0) := x"BA00_0000";
    constant BAR1: std_logic_vector(31 downto 0) := x"BA10_0000";
    constant BAR2: std_logic_vector(31 downto 0) := x"BA20_0000";
    type slvD_array is array (natural range <>) of std_logic_vector(DATA_WIDTH-1 downto 0);
    type slv2D_array is array (natural range <>) of slvD_array(0 to 127);

    signal ToHostMem: slv2D_array(0 to 3);
    signal FromHostMem: slvD_array(0 to 127);
    signal FromHostWrapCount : integer:= 1;
    signal ToHostWrapCount : integer:= 0;
    signal ToHostMemorySelect_s : integer range 0 to NUMBER_OF_DESCRIPTORS-2;
    signal DoCompare: std_logic;
    signal ClearToHostMem: std_logic; --Signal to set the ToHost Memory to all 'U', so it's more visible where a new round starts.
    signal user_clk: std_logic;
    signal sysclk_gen: std_logic;
    signal wait_for_cc_tvalid: std_logic; -- @suppress "signal wait_for_cc_tvalid is never read"
    signal switch_off_tohost: std_logic;
    signal do_finish_fromhost: boolean := false;
    type IntArray_type is array (natural range <>) of integer;
    type slv64_arr is array(natural range <>) of std_logic_vector(63 downto 0);
    type slv11_arr is array(natural range <>) of std_logic_vector(10 downto 0);
    constant TLP_SIZE_TOHOST : IntArray_type(0 to 3) := (512, 256, 64*(DATA_WIDTH/256), 32*(DATA_WIDTH/256));
    constant TLP_SIZE_FROMHOST : integer := 256;

    signal RcStraddlePosition_s: integer range 0 to DATA_WIDTH/128-1;
    signal RqStraddlePosition_s: integer range 0 to DATA_WIDTH/256-1;
    --signal responding : std_logic;

    type axis_rc_tuser_bits_type is record
        byte_en: std_logic_vector(127 downto 0); --31:0 for 256b, 63:0 for 512b, 127:0 for 1024b
        is_sop:  std_logic_vector(7 downto 0);  --33:32 for 256b, 76:64 for 512b,
        is_sop0_ptr: std_logic_vector(2 downto 0); --69:68 for 512b
        is_sop1_ptr: std_logic_vector(2 downto 0); --71:70 for 512b
        is_sop2_ptr: std_logic_vector(2 downto 0); --73:72 for 512b
        is_sop3_ptr: std_logic_vector(2 downto 0); --75:74 for 512b
        is_sop4_ptr: std_logic_vector(2 downto 0); --Only for 1024 bit / gen5
        is_sop5_ptr: std_logic_vector(2 downto 0); --Only for 1024 bit / gen5
        is_sop6_ptr: std_logic_vector(2 downto 0); --Only for 1024 bit / gen5
        is_sop7_ptr: std_logic_vector(2 downto 0); --Only for 1024 bit / gen5
        is_eop: std_logic_vector(7 downto 0); --37:34 for 256b, 79:76 for 512b.
        is_eop0_ptr: std_logic_vector(4 downto 0); --83:80 for 512b, part of is_eop0[3:1] for 256b
        is_eop1_ptr: std_logic_vector(4 downto 0); --87:84 for 512b, part of is_eop1[3:1] for 256b
        is_eop2_ptr: std_logic_vector(4 downto 0); --91:88 for 512b, NA for 256b
        is_eop3_ptr: std_logic_vector(4 downto 0); --95:92 for 512b, NA for 256b
        is_eop4_ptr: std_logic_vector(4 downto 0); --Only for 1024 bit / gen5
        is_eop5_ptr: std_logic_vector(4 downto 0); --Only for 1024 bit / gen5
        is_eop6_ptr: std_logic_vector(4 downto 0); --Only for 1024 bit / gen5
        is_eop7_ptr: std_logic_vector(4 downto 0); --Only for 1024 bit / gen5
        discontinue: std_logic; --42 for 256, 96 for 512, unused set to '0'
        parity: std_logic_vector(127 downto 0); --74:43 for 256b, 160:97 for 512b, set to x"0000_0000_0000_0000"
    end record;
    type axis_rq_tuser_bits_type is record
        first_be: std_logic_vector(15 downto 0); --3:0 for 256b, 7:0 for 512b, 15:0 for 1024b
        last_be:  std_logic_vector(15 downto 0); --7:3 for 256b, 15:8 for 512b, 31:16 for 1024b
        addr_offset: std_logic_vector(15 downto 0); --10:8 for 256, 19:16 for 512b, unused, set to "0000"
        is_sop: std_logic_vector(3 downto 0); --21:20 for 512b
        is_sop0_ptr: std_logic_vector(1 downto 0); --23:22 for 512b
        is_sop1_ptr: std_logic_vector(1 downto 0); --25:24 for 512b
        is_sop2_ptr: std_logic_vector(1 downto 0); --Only for 1024b
        is_sop3_ptr: std_logic_vector(1 downto 0); --Only for 1024b
        is_eop: std_logic_vector(3 downto 0); --27:26 for 512b
        is_eop0_ptr: std_logic_vector(4 downto 0); --31:28 for 512b
        is_eop1_ptr: std_logic_vector(4 downto 0); --35:32 for 512b
        is_eop2_ptr: std_logic_vector(4 downto 0); --Only for 1024b
        is_eop3_ptr: std_logic_vector(4 downto 0); --Only for 1024b
        discontinue: std_logic; --11 for 256, 36 for 512, unused set to '0'
        tph_present: std_logic_vector(1 downto 0); --12 for 256b, 38:37 for 512b, set to "00"
        tph_type: std_logic_vector(3 downto 0);  --14:13 for 256, 42:39 for 512b, set to "0000"
        tph_indirect_tag_en: std_logic_vector(1 downto 0); --15 for 256b, 44:43 for 512b, set to "00"
        tph_st_tag: std_logic_vector(15 downto 0); --23:16 for 256b, 60:45 for 512b, set to x"0000"
        seq_num0: std_logic_vector(5 downto 0); --61:60,27:24 for 256b, 66:61 for 512b, set to "000000"
        seq_num1: std_logic_vector(5 downto 0); --72:67 for 512b, set to "000000"
        seq_num2: std_logic_vector(5 downto 0); --Only for 1024b
        seq_num3: std_logic_vector(5 downto 0); --Only for 1024b
        parity: std_logic_vector(127 downto 0); --59:28 for 256b, 136:73 for 512b, set to x"0000_0000_0000_0000"
    end record;

    signal axis_rq_tuser_bits: axis_rq_tuser_bits_type;


begin
    user_clk_proc: process
    begin
        sysclk_gen <= '1';
        wait for user_clk_period / 2;
        sysclk_gen <= '0';
        wait for user_clk_period / 2;
    end process;

    cfg_fc_cpld                <= (others => '0');--: out    std_logic_vector(11 downto 0);
    cfg_fc_cplh                <= (others => '0');--: out    std_logic_vector(7 downto 0);
    cfg_fc_npd                 <= (others => '0');--: out    std_logic_vector(11 downto 0);
    cfg_fc_nph                 <= (others => '0');--: out    std_logic_vector(7 downto 0);
    cfg_fc_pd                  <= (others => '0');--: out    std_logic_vector(11 downto 0);
    cfg_fc_ph                  <= (others => '0');--: out    std_logic_vector(7 downto 0);
    --cfg_fc_sel                 : in     std_logic_vector(2 downto 0);
    --cfg_interrupt_msix_address : in     std_logic_vector(63 downto 0);
    --cfg_interrupt_msix_data    : in     std_logic_vector(31 downto 0);
    cfg_interrupt_msix_enable  <= (others => '1');--: out    std_logic_vector(3 downto 0);
    cfg_interrupt_msix_fail    <= '0';--: out    std_logic;
    cfg_interrupt_msix_sent    <= cfg_interrupt_msix_int;--: out    std_logic;

    mgmt_proc: process(user_clk, reset)
    begin
        if reset = '1' then
            cfg_mgmt_read_data <= (others => '0');
        elsif rising_edge(user_clk) then
            cfg_mgmt_read_write_done <= cfg_mgmt_read or cfg_mgmt_write;
            if cfg_mgmt_addr = "000"&x"0004" and cfg_mgmt_read = '1' then --read BAR0
                cfg_mgmt_read_data <= BAR0; --BAR0 address
            end if;
            if cfg_mgmt_addr = "000"&x"0005" and cfg_mgmt_read = '1' then --read BAR0
                cfg_mgmt_read_data <= BAR1; --BAR0 address
            end if;
            if cfg_mgmt_addr = "000"&x"0006" and cfg_mgmt_read = '1' then --read BAR0
                cfg_mgmt_read_data <= BAR2; --BAR0 address
            end if;
        end if;
    end process;

    reg_write: process
        variable RegData: std_logic_vector(63 downto 0);
        variable pc_pointer_FromHost, last_pc_pointer_FromHost: std_logic_vector(11 downto 0);
        constant pc_pointer_FromHost_msb: std_logic_vector(63 downto 12):= x"DEF8_0000_0000_0";
        variable tohost_switched_off : std_logic;
        variable fromhost_wraps: integer;
        variable all_descriptors_enable: std_logic_vector(7 downto 0);
        procedure w(RegAddr: in std_logic_vector(19 downto 0);
            BarAddr: in std_logic_vector(31 downto 0);
            Data:    in std_logic_vector(63 downto 0)) is
            variable bar_id: std_logic_vector(2 downto 0):= "000";
        begin
            wait until rising_edge(user_clk);
            m_axis_cq.tdata <= (others => '0');

            m_axis_cq.tdata(1 downto 0) <= "00"; --address type
            m_axis_cq.tdata(63 downto 32) <= x"0000_0000";
            m_axis_cq.tdata(31 downto 20) <= BarAddr(31 downto 20);
            m_axis_cq.tdata(19 downto 2) <= RegAddr(19 downto 2);

            m_axis_cq.tdata(74 downto 64) <= "00000000010";-- 2 words, 64 bit write dword_count_s;
            m_axis_cq.tdata(78 downto 75) <= "0001"; --Memory write, request_type_v
            m_axis_cq.tdata(95 downto 80)    <= x"0000";--requester_id_s
            m_axis_cq.tdata(103 downto 96)   <= x"00";--tag_s
            m_axis_cq.tdata(111 downto 104)  <= x"00";--target_function_s
            if BarAddr = BAR0 then
                bar_id := "000";
            end if;
            if BarAddr = BAR1 then
                bar_id := "001";
            end if;
            if BarAddr = BAR2 then
                bar_id := "010";
            end if;
            m_axis_cq.tdata(114 downto 112)  <= bar_id;
            m_axis_cq.tdata(120 downto 115)  <= "000000";--bar_aperture_s
            m_axis_cq.tdata(123 downto 121)  <= "000";--transaction_class_s
            m_axis_cq.tdata(126 downto 124)  <= "000";--attributes_s
            m_axis_cq.tdata(191 downto 128)  <= Data; --register_write_data_250_s
            m_axis_cq.tdata(DATA_WIDTH-1 downto 192)  <= (others => '0'); --register_write_data_250_s
            m_axis_cq.tuser      <= (others => '0');
            if DATA_WIDTH = 1024 then
                m_axis_cq.tuser(131 downto 128)   <= "1111";--first_be_s
                m_axis_cq.tuser(147 downto 144)   <= "1111";--last_be_s
            elsif DATA_WIDTH = 512 then
                m_axis_cq.tuser(3 downto 0)       <= "1111";--first_be_s
                m_axis_cq.tuser(11 downto 8)      <= "1111";--last_be_s
            elsif DATA_WIDTH = 256 then
                m_axis_cq.tuser(3 downto 0)       <= "1111";--first_be_s
                m_axis_cq.tuser(7 downto 4)      <= "1111";--last_be_s
            else
                error("Unsupported DATA_WIDTH " & to_string(DATA_WIDTH) & " bits");
            end if;
            m_axis_cq.tkeep(m_axis_cq.tkeep'high downto 0)      <= (others => '0'); --For 512bit mode
            m_axis_cq.tkeep(7 downto 0)      <= "00111111"; --64b writes, don't care about the MSB 64 bits
            m_axis_cq.tvalid                 <= '1';
            m_axis_cq.tlast                  <= '1';
            wait until rising_edge(user_clk);
            while m_axis_r_cq.tready = '0' loop
                wait until rising_edge(user_clk);
            end loop;
            m_axis_cq.tvalid                 <= '0';
            m_axis_cq.tlast                  <= '0';

        end w;
        procedure r(RegAddr: in std_logic_vector(19 downto 0);
            BarAddr: in std_logic_vector(31 downto 0);
            DataOut: out std_logic_vector(63 downto 0)
        ) is
            variable bar_id: std_logic_vector(2 downto 0):= "000";
            variable timeout: integer;
        begin
            wait until rising_edge(user_clk);
            while m_axis_r_cq.tready = '0' loop
                wait until rising_edge(user_clk);
                timeout := timeout + 1;
                if timeout = 100 then
                    report "Timeout waiting for tready" severity error;
                    std.env.stop;
                    exit;
                end if;
            end loop;
            --wait until rising_edge(user_clk);
            m_axis_cq.tdata <= (others => '0');

            m_axis_cq.tdata(1 downto 0) <= "00"; --address type
            m_axis_cq.tdata(63 downto 32) <= x"0000_0000";
            m_axis_cq.tdata(31 downto 20) <= BarAddr(31 downto 20);
            m_axis_cq.tdata(19 downto 2) <= RegAddr(19 downto 2);

            m_axis_cq.tdata(74 downto 64) <= "00000000010";-- 2 words, 64 bit read dword_count_s;
            m_axis_cq.tdata(78 downto 75) <= "0000"; --Memory read, request_type_v
            m_axis_cq.tdata(95 downto 80)    <= x"0000";--requester_id_s
            m_axis_cq.tdata(103 downto 96)   <= x"00";--tag_s
            m_axis_cq.tdata(111 downto 104)  <= x"00";--target_function_s
            if BarAddr = BAR0 then
                bar_id := "000";
            end if;
            if BarAddr = BAR1 then
                bar_id := "001";
            end if;
            if BarAddr = BAR2 then
                bar_id := "010";
            end if;
            m_axis_cq.tdata(114 downto 112)  <= bar_id;
            m_axis_cq.tdata(120 downto 115)  <= "000000";--bar_aperture_s
            m_axis_cq.tdata(123 downto 121)  <= "000";--transaction_class_s
            m_axis_cq.tdata(126 downto 124)  <= "000";--attributes_s
            m_axis_cq.tdata(191 downto 128)  <= (others => '0'); --register_write_data_250_s
            m_axis_cq.tdata(DATA_WIDTH-1 downto 192)  <= (others => '0'); --register_write_data_250_s
            m_axis_cq.tuser(84 downto 0)      <= (others => '0');
            m_axis_cq.tuser(3 downto 0)      <= "1111";--first_be_s
            if DATA_WIDTH = 1024 then
                m_axis_cq.tuser(19 downto 16)     <= "1111";--last_be_s
            elsif DATA_WIDTH = 512 then
                m_axis_cq.tuser(11 downto 8)      <= "1111";--last_be_s
            elsif DATA_WIDTH = 256 then
                m_axis_cq.tuser(7 downto 4)      <= "1111";--last_be_s
            else
                error("Unsupported DATA_WIDTH " & to_string(DATA_WIDTH) & " bits");
            end if;
            m_axis_cq.tkeep(m_axis_cq.tkeep'high downto 0)      <= (others => '0'); --For 512bit mode
            m_axis_cq.tkeep(7 downto 0)      <= "00001111"; --no payload
            m_axis_cq.tvalid                 <= '1';
            m_axis_cq.tlast                  <= '1';
            --wait until rising_edge(user_clk);
            timeout := 0;
            wait_for_cc_tvalid <= '1';
            while s_axis_cc.tvalid = '0' loop
                wait for user_clk_period/2;
                timeout := timeout + 1;
                if timeout = 100 then
                    report "Timeout waiting for tvalid" severity error;
                    std.env.stop;
                    exit;
                end if;
            end loop;
            wait_for_cc_tvalid <= '0';
            while m_axis_r_cq.tready = '0' loop
                wait until rising_edge(user_clk);
                timeout := timeout + 1;
                if timeout = 100 then
                    report "Timeout waiting for tready" severity error;
                    std.env.stop;
                    exit;
                end if;
            end loop;
            m_axis_cq.tvalid                 <= '0';
            m_axis_cq.tlast                  <= '0';
            timeout := 0;
            DataOut := s_axis_cc.tdata(96+63 downto 96);
        end r;

    begin
        report_global_ctrl(VOID);
        report_msg_id_panel(VOID);
        enable_log_msg(ALL_MESSAGES);
        pc_pointer_FromHost := x"000";
        last_pc_pointer_FromHost := x"000";
        tohost_switched_off := '0';
        fromhost_wraps := 0;

        wait for 18 us; --startup time
        w(REG_PC_PTR_GAP, BAR0, x"0000_0000_0000_0100"); --set pc_ptr_gap
        for i in 0 to NUMBER_OF_DESCRIPTORS-2 loop
            w(REG_DESCRIPTOR_0+i*32,      BAR0, x"AA"&std_logic_vector(to_unsigned(i,8))&x"0000_0000_0000"); --descr 0 start address
            w(REG_DESCRIPTOR_0+i*32+8,    BAR0, x"AA"&std_logic_vector(to_unsigned(i,8))&DESCRIPTOR_SIZE); --descr 0 end address
            w(REG_DESCRIPTOR_0a+i*32+8,   BAR0, x"AA"&std_logic_vector(to_unsigned(i,8))&x"0000_0000_0000"); --init PC pointer at start_address
            w(REG_DESCRIPTOR_0a+i*32,     BAR0, x"0000_0000_0000_1000"+TLP_SIZE_TOHOST(i)/4); --wrap around, ToHost, 512 bytes
        end loop;
        all_descriptors_enable := x"00";
        for i in 0 to NUMBER_OF_DESCRIPTORS -1 loop
            all_descriptors_enable(i) := '1';
        end loop;

        w(REG_DESCRIPTOR_0+((NUMBER_OF_DESCRIPTORS-1)*32),      BAR0, x"DEF8_0000_0000_0000"); --descr N start address
        w(REG_DESCRIPTOR_0+8+(NUMBER_OF_DESCRIPTORS-1)*32,   BAR0, x"DEF8"&FROMHOST_DESCRIPTOR_SIZE); --set to end_address
        w(REG_DESCRIPTOR_0a+((NUMBER_OF_DESCRIPTORS-1)*32)+8,   BAR0, pc_pointer_FromHost_msb&pc_pointer_FromHost); --init PC pointer at start_address like felixcore seems to do
        --w(REG_DESCRIPTOR_0a+((NUMBER_OF_DESCRIPTORS-1)*32)+8,   BAR0, x"DEF8"&"00"&(DESCRIPTOR_SIZE(DESCRIPTOR_SIZE'high downto 2))); --init PC pointer at 1/4 end_address
        w(REG_DESCRIPTOR_0a+((NUMBER_OF_DESCRIPTORS-1)*32),     BAR0, x"0000_0000_0000_1800"+TLP_SIZE_FROMHOST/4); --wrap around, FromHost, 32 or 64 bytes
        w(REG_DESCRIPTOR_ENABLE, BAR0, x"0000_0000_0000_0010"); --Enable only FromHost descriptor
        w(REG_DESCRIPTOR_ENABLE, BAR0, x"0000_0000_0000_00"&all_descriptors_enable); --Enable all descriptors
        w(REG_STATUS_LEDS, BAR2, X"0000_0000_0000_00AB");
        --! ---- Removed this check, now going completely circular
        --!   wait for 1 us; --See what FromHost current_address does with pc_pointer at start_address.
        --!   r(REG_STATUS_0+(NUMBER_OF_DESCRIPTORS-1)*16, BAR0,RegData);
        --!   check_value(RegData, x"DEF8_0000_0000_0000", "FromHost current_address should stay nicely at start_address", C_SCOPE);
        --!   --Increment FromHost descriptor by a single TLP
        --!   w(REG_DESCRIPTOR_0a+((NUMBER_OF_DESCRIPTORS-1)*32)+8,   BAR0, x"DEF8_0000_0000_0020"); --init PC pointer at start_address like felixcore seems to do
        --!   wait for 1 us;
        --!   r(REG_STATUS_0+(NUMBER_OF_DESCRIPTORS-1)*16, BAR0,RegData);
        --!   check_value(RegData, x"DEF8_0000_0000_0020", "FromHost current_address should stay nicely at start_address+32", C_SCOPE);
        --!   w(REG_DESCRIPTOR_0a+((NUMBER_OF_DESCRIPTORS-1)*32)+8,   BAR0, x"DEF8_0000_0000_0060"); --init PC pointer at start_address like felixcore seems to do
        --!   wait for 1 us;
        --!   r(REG_STATUS_0+(NUMBER_OF_DESCRIPTORS-1)*16, BAR0, RegData);
        --!   check_value(RegData, x"DEF8_0000_0000_0060", "FromHost current_address should stay nicely at start_address+32", C_SCOPE);
        --!   w(REG_DESCRIPTOR_0a+((NUMBER_OF_DESCRIPTORS-1)*32)+8,   BAR0, x"DEF8"&"00"&(DESCRIPTOR_SIZE(DESCRIPTOR_SIZE'high downto 2))); --init PC pointer at 1/4 end_address
        --! ----  Removed this check, now going completely circular
        --loop
        --    r(REG_DESCRIPTOR_ENABLE, BAR0);
        --    RegData := s_axis_cc.tdata(96+63 downto 96);
        --    if(RegData(NUMBER_OF_DESCRIPTORS-1) = '0') then
        --        report "FromHost descriptor done, re-enabling";
        --        w(REG_DESCRIPTOR_ENABLE, BAR0, x"0000_0000_0000_0010"); --Enable only FromHost descriptor
        --    end if;
        --
        --end loop;
        loop
            --Wrap ToHost descriptors
            if switch_off_tohost = '0' then
                for i in 0 to NUMBER_OF_DESCRIPTORS-2 loop
                    r(REG_STATUS_0+i*16, BAR0, RegData);
                    report "current address for desc "&to_string(i)&": "&to_hstring(RegData);
                    if(RegData(47 downto 0) = DESCRIPTOR_SIZE-TLP_SIZE_TOHOST(i) or
                       RegData(47 downto 0) = x"0000_0000_0000") then
                        w(REG_DESCRIPTOR_0a+8+i*32,   BAR0, x"AA"&std_logic_vector(to_unsigned(i,8))&DESCRIPTOR_SIZE-TLP_SIZE_TOHOST(i)); --set pc pointer to end_address
                        w(REG_DESCRIPTOR_0a+8+i*32,   BAR0, x"AA"&std_logic_vector(to_unsigned(i,8))&x"0000_0000_0000"); --Wrap ToHost pc_pointer
                    end if;
                end loop;
            else
                if tohost_switched_off = '0' then
                    RegData := (others => '0');
                    RegData(NUMBER_OF_DESCRIPTORS-1) := '1';
                    w(REG_DESCRIPTOR_ENABLE, BAR0, RegData); --Enable only FromHost descriptor
                end if;
                tohost_switched_off := '1';
            end if;
            --read FromHost current address.
            r(REG_STATUS_0+(NUMBER_OF_DESCRIPTORS-1)*16, BAR0, RegData);
            report "current address for desc "&to_string(NUMBER_OF_DESCRIPTORS-1)&": "&to_hstring(RegData);
            if(RegData = pc_pointer_FromHost_msb&pc_pointer_FromHost) then
                last_pc_pointer_FromHost := pc_pointer_FromHost;
                pc_pointer_FromHost := pc_pointer_FromHost+TLP_SIZE_FROMHOST*2;
                report "Incrementing FromHost PC pointer to "&to_hstring(pc_pointer_FromHost);
                w(REG_DESCRIPTOR_0a+8+(NUMBER_OF_DESCRIPTORS-1)*32,   BAR0, pc_pointer_FromHost_msb&pc_pointer_FromHost); --Wrap ToHost pc_pointer
                if pc_pointer_FromHost = x"000" then
                    fromhost_wraps := fromhost_wraps + 1;
                end if;
                if fromhost_wraps = 40 then
                    do_finish_fromhost <= true;
                end if;
            elsif(RegData = pc_pointer_FromHost_msb&last_pc_pointer_FromHost) then
                report "current address still needs to update, not incrementing";
            elsif(RegData > pc_pointer_FromHost_msb&last_pc_pointer_FromHost and RegData < pc_pointer_FromHost_msb&pc_pointer_FromHost) then
                report "current address has updated, but not up to pc pointer, DMA in progress";
            else
                report ("Illegal FromHost address, was: "&to_hstring(RegData)&" Expected: "
                                                 &to_hstring(pc_pointer_FromHost_msb&pc_pointer_FromHost)&" or: "
                                                 &to_hstring(pc_pointer_FromHost_msb&last_pc_pointer_FromHost));
            end if;
            wait for 0 ns;
        end loop;


    end process;
    s_axis_r_cc.tready <= '1';

    g_FromHostMem: for i in 0 to 127 generate
        g_256: if DATA_WIDTH = 256 generate
            FromHostMem(i) <=
                              std_logic_vector(to_unsigned(i, 32))&x"AAAAAA"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<256>)"
                              std_logic_vector(to_unsigned(i, 32))&x"BBBBBB"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<256>)"
                              std_logic_vector(to_unsigned(i, 32))&x"CCCCCC"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<256>)"
                              std_logic_vector(to_unsigned(i, 32))&x"DDDDDD"&std_logic_vector(to_unsigned(FromHostWrapCount, 8)); -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<256>)"
        end generate;
        g_512: if DATA_WIDTH = 512 generate
            FromHostMem(i) <=
                              std_logic_vector(to_unsigned(i, 32))&x"AAAAAA"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"BBBBBB"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"CCCCCC"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"DDDDDD"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"EEEEEE"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"FFFFFF"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"ABABAB"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"CDCDCD"&std_logic_vector(to_unsigned(FromHostWrapCount, 8)); -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
        end generate;
        g_1024: if DATA_WIDTH = 1024 generate
            FromHostMem(i) <=
                              std_logic_vector(to_unsigned(i, 32))&x"AAAAAA"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"BBBBBB"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"CCCCCC"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"DDDDDD"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"EEEEEE"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"FFFFFF"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"ABABAB"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"CDCDCD"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"000000"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"111111"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"222222"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"333333"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"444444"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"555555"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"666666"&std_logic_vector(to_unsigned(FromHostWrapCount, 8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                              std_logic_vector(to_unsigned(i, 32))&x"777777"&std_logic_vector(to_unsigned(FromHostWrapCount, 8)); -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
        end generate;

    end generate;

    g_tuserbits_256: if DATA_WIDTH = 256 generate
        axis_rq_tuser_bits.first_be            <= x"000"& s_axis_rq.tuser(3 downto 0);
        axis_rq_tuser_bits.last_be             <= x"000"&s_axis_rq.tuser(7 downto 4);
        axis_rq_tuser_bits.addr_offset         <= x"000"&'0'&s_axis_rq.tuser(10 downto 8);
        axis_rq_tuser_bits.is_sop              <= (others => '0') ;
        axis_rq_tuser_bits.is_sop0_ptr         <= (others => '0') ;
        axis_rq_tuser_bits.is_sop1_ptr         <= (others => '0') ;
        axis_rq_tuser_bits.is_sop2_ptr         <= (others => '0') ;
        axis_rq_tuser_bits.is_sop3_ptr         <= (others => '0') ;
        axis_rq_tuser_bits.is_eop              <= (others => '0') ;
        axis_rq_tuser_bits.is_eop0_ptr         <= (others => '0') ;
        axis_rq_tuser_bits.is_eop1_ptr         <= (others => '0') ;
        axis_rq_tuser_bits.is_eop2_ptr         <= (others => '0') ;
        axis_rq_tuser_bits.is_eop3_ptr         <= (others => '0') ;
        axis_rq_tuser_bits.discontinue         <= s_axis_rq.tuser(11);
        axis_rq_tuser_bits.tph_present         <= '0'&s_axis_rq.tuser(12);
        axis_rq_tuser_bits.tph_type            <= "00"&s_axis_rq.tuser(14 downto 13);
        axis_rq_tuser_bits.tph_indirect_tag_en <= '0'&s_axis_rq.tuser(15);
        axis_rq_tuser_bits.tph_st_tag          <= x"00"&s_axis_rq.tuser(23 downto 16);
        axis_rq_tuser_bits.seq_num0            <= s_axis_rq.tuser(61 downto 60) & s_axis_rq.tuser(27 downto 24);
        axis_rq_tuser_bits.seq_num1            <= "000000";
        axis_rq_tuser_bits.seq_num2            <= "000000";
        axis_rq_tuser_bits.seq_num3            <= "000000";
        axis_rq_tuser_bits.parity              <= x"0000_0000_0000_0000_0000_0000"&s_axis_rq.tuser(59 downto 28);

    end generate;
    g_tuserbits_512: if DATA_WIDTH = 512 generate
        axis_rq_tuser_bits.first_be            <= x"00"&s_axis_rq.tuser(7 downto 0)   ;
        axis_rq_tuser_bits.last_be             <= x"00"&s_axis_rq.tuser(15 downto 8)  ;
        axis_rq_tuser_bits.addr_offset         <= x"000"&s_axis_rq.tuser(19 downto 16) ;
        axis_rq_tuser_bits.is_sop              <= "00"&s_axis_rq.tuser(21 downto 20) ;
        axis_rq_tuser_bits.is_sop0_ptr         <= s_axis_rq.tuser(23 downto 22) ;
        axis_rq_tuser_bits.is_sop1_ptr         <= s_axis_rq.tuser(25 downto 24) ;
        axis_rq_tuser_bits.is_sop2_ptr         <= (others => '0') ;
        axis_rq_tuser_bits.is_sop3_ptr         <= (others => '0') ;
        axis_rq_tuser_bits.is_eop              <= "00"&s_axis_rq.tuser(27 downto 26) ;
        axis_rq_tuser_bits.is_eop0_ptr         <= '0'&s_axis_rq.tuser(31 downto 28) ;
        axis_rq_tuser_bits.is_eop1_ptr         <= '0'&s_axis_rq.tuser(35 downto 32) ;
        axis_rq_tuser_bits.is_eop2_ptr         <= (others => '0') ;
        axis_rq_tuser_bits.is_eop3_ptr         <= (others => '0') ;
        axis_rq_tuser_bits.discontinue         <= s_axis_rq.tuser(36)           ;
        axis_rq_tuser_bits.tph_present         <= s_axis_rq.tuser(38 downto 37) ;
        axis_rq_tuser_bits.tph_type            <= s_axis_rq.tuser(42 downto 39) ;
        axis_rq_tuser_bits.tph_indirect_tag_en <= s_axis_rq.tuser(44 downto 43) ;
        axis_rq_tuser_bits.tph_st_tag          <= s_axis_rq.tuser(60 downto 45) ;
        axis_rq_tuser_bits.seq_num0            <= s_axis_rq.tuser(66 downto 61) ;
        axis_rq_tuser_bits.seq_num1            <= s_axis_rq.tuser(72 downto 67) ;
        axis_rq_tuser_bits.seq_num1            <= (others => '0') ;
        axis_rq_tuser_bits.seq_num1            <= (others => '0') ;
        axis_rq_tuser_bits.parity              <= x"0000_0000_0000_0000"&s_axis_rq.tuser(136 downto 73);
    end generate;
    g_tuserbits_1024: if DATA_WIDTH = 1024 generate
        axis_rq_tuser_bits.first_be            <= s_axis_rq.tuser(15 downto 0)   ;
        axis_rq_tuser_bits.last_be             <= s_axis_rq.tuser(31 downto 16)  ;
        axis_rq_tuser_bits.addr_offset         <= s_axis_rq.tuser(47 downto 32) ;
        axis_rq_tuser_bits.is_sop              <= s_axis_rq.tuser(51 downto 48) ;
        axis_rq_tuser_bits.is_sop0_ptr         <= s_axis_rq.tuser(53 downto 52) ;
        axis_rq_tuser_bits.is_sop1_ptr         <= s_axis_rq.tuser(55 downto 54) ;
        axis_rq_tuser_bits.is_sop2_ptr         <= s_axis_rq.tuser(57 downto 56) ;
        axis_rq_tuser_bits.is_sop3_ptr         <= s_axis_rq.tuser(59 downto 58) ;
        axis_rq_tuser_bits.is_eop              <= s_axis_rq.tuser(63 downto 60) ;
        axis_rq_tuser_bits.is_eop0_ptr         <= s_axis_rq.tuser(68 downto 64) ;
        axis_rq_tuser_bits.is_eop1_ptr         <= s_axis_rq.tuser(73 downto 69) ;
        axis_rq_tuser_bits.is_eop2_ptr         <= s_axis_rq.tuser(78 downto 74) ;
        axis_rq_tuser_bits.is_eop3_ptr         <= s_axis_rq.tuser(83 downto 79) ;
        axis_rq_tuser_bits.discontinue         <= s_axis_rq.tuser(84)           ;
        axis_rq_tuser_bits.tph_present         <= (others => '0');
        axis_rq_tuser_bits.tph_type            <= (others => '0');
        axis_rq_tuser_bits.tph_indirect_tag_en <= (others => '0');
        axis_rq_tuser_bits.tph_st_tag          <= (others => '0');
        axis_rq_tuser_bits.seq_num0            <= s_axis_rq.tuser(354 downto 349) ;
        axis_rq_tuser_bits.seq_num1            <= s_axis_rq.tuser(360 downto 355) ;
        axis_rq_tuser_bits.seq_num2            <= s_axis_rq.tuser(366 downto 361) ;
        axis_rq_tuser_bits.seq_num3            <= s_axis_rq.tuser(372 downto 367) ;
        axis_rq_tuser_bits.parity(75 downto 0) <= s_axis_rq.tuser(448 downto 373);
        axis_rq_tuser_bits.parity(127 downto 76) <= (others => '0');

    end generate;

    response_proc: process(user_clk, sys_rst_n)
        variable ToHost_tlp_busy: std_logic := '0';
        variable rq_header: std_logic_vector(127 downto 0);
        variable address: std_logic_vector(63 downto 0);
        variable address_out_of_order: std_logic_vector(63 downto 0);
        variable dword_count: std_logic_vector(10 downto 0);
        variable request_type: std_logic_vector(3 downto 0); --"0001" for write, "0000" for read.
        variable ToHost_pipe_data: std_logic_vector(DATA_WIDTH-129 downto 0);
        variable TLPsToSend: integer range 0 to 65536:= 0;
        variable FromHostIndexes: IntArray_type(0 to 255);
        variable FromHostAddresses: slv64_arr(0 to 255);
        variable FromHostDwordCounts: slv11_arr(0 to 255);
        variable ToHostIndex: integer range 0 to 127;
        variable FromHost_pipe_data: std_logic_vector((DATA_WIDTH/128-1)*128+95 downto 0);
        variable TlpIndex: integer range 0 to 127:= 0;
        variable ToHostMemorySelect: integer range 0 to 3;
        variable IncreaseFromHostWrapCount: std_logic;
        variable RcStraddlePosition: integer range 0 to DATA_WIDTH/128-1;
        variable RqStraddlePosition: integer range 0 to DATA_WIDTH/256-1;
        variable rc_header: std_logic_vector(95 downto 0);
        variable axis_rc_tuser_bits: axis_rc_tuser_bits_type;
        variable TlpSize: integer;
    begin
        if sys_rst_n = '0' then
            --responding <= '1';
            RqStraddlePosition := 0;
            TlpSize := 0;
        elsif rising_edge(user_clk) then
            IncreaseFromHostWrapCount := '0';
            DoCompare <= '0';
            ClearToHostMem <= '0';
            if ClearToHostMem = '1' then
                for i in 0 to 127 loop
                    ToHostMem(ToHostMemorySelect)(i) <= (others => 'U');
                end loop;
            end if;
            if s_axis_rq.tvalid = '1' and s_axis_r_rq.tready = '1' then
                --Calculate the TLP Size (For Gen5+straddling). In DWORDS (32b), only for checking the TLP size, nothing else is done with TlpSize (message size is taken from the TLP header instead).
                if DATA_WIDTH=1024 then
                    if axis_rq_tuser_bits.is_eop = "0001" then
                        if axis_rq_tuser_bits.is_sop = "0001" then
                            if axis_rq_tuser_bits.is_sop0_ptr = "00" and axis_rq_tuser_bits.is_eop0_ptr = "00011" then --Read request at pos 0, TLP Size is 0
                                TlpSize := -4;
                            elsif axis_rq_tuser_bits.is_sop0_ptr = "01" and axis_rq_tuser_bits.is_eop0_ptr = "01011" then --Read request at pos 1, TLP Size is 0
                                TlpSize := -12;
                            elsif axis_rq_tuser_bits.is_sop0_ptr = "10" and axis_rq_tuser_bits.is_eop0_ptr = "10011" then --Read request at pos 2, TLP Size is 0
                                TlpSize := -20;
                            elsif axis_rq_tuser_bits.is_sop0_ptr = "11" and axis_rq_tuser_bits.is_eop0_ptr = "11011" then --Read request at pos 3, TLP Size is 0
                                TlpSize := -28;
                            end if;
                        end if;
                        TlpSize := TlpSize + to_integer(unsigned(axis_rq_tuser_bits.is_eop0_ptr))+1;
                        log(ID_LOG_HDR,"-TLP Size received: "&to_string(TlpSize*4)&" bytes", C_SCOPE);
                        if TlpSize /= 128 and TlpSize /= 64 and TlpSize /= 32 and TlpSize /= 16 then
                            if TlpSize = 0 and s_axis_rq.tdata(78 + (to_integer(unsigned(axis_rq_tuser_bits.is_sop0_ptr)) * 256) downto 75 + (to_integer(unsigned(axis_rq_tuser_bits.is_sop0_ptr)) * 256)) = "0000" then
                                log(ID_LOG_HDR,"TLP Size 0, read request", C_SCOPE);
                            else
                                error("Wrong TLP Size: " & to_string(TlpSize*4) & " bytes");
                            end if;
                        end if;
                    end if;

                    TlpSize := TlpSize + 32;
                    if axis_rq_tuser_bits.is_sop = "0001" then
                        if axis_rq_tuser_bits.is_sop0_ptr = "00" then
                            TlpSize := 28;
                        elsif axis_rq_tuser_bits.is_sop0_ptr = "01" then
                            TlpSize := 20;
                        elsif axis_rq_tuser_bits.is_sop0_ptr = "10" then
                            TlpSize := 12;
                        elsif axis_rq_tuser_bits.is_sop0_ptr = "11" then
                            TlpSize := 4;
                        else
                            error("Wrong sop0_ptr" & to_hstring(axis_rq_tuser_bits.is_sop0_ptr));
                        end if;
                    end if;
                --Calculate the TLP Size (For Gen4+straddling). In DWORDS (32b)
                elsif DATA_WIDTH=512 then
                    if axis_rq_tuser_bits.is_eop = "0001" then
                        if axis_rq_tuser_bits.is_sop = "0001" then
                            if axis_rq_tuser_bits.is_sop0_ptr = "00" and axis_rq_tuser_bits.is_eop0_ptr = "00011" then --Read request at pos 0, TLP Size is 0
                                TlpSize := -4;
                            elsif axis_rq_tuser_bits.is_sop0_ptr = "10" and axis_rq_tuser_bits.is_eop0_ptr = "01011" then --Read request at pos 1, TLP Size is 0
                                TlpSize := -12;
                            end if;
                        end if;
                        TlpSize := TlpSize + to_integer(unsigned(axis_rq_tuser_bits.is_eop0_ptr))+1;
                        log(ID_LOG_HDR,"TLP Size received: "&to_string(TlpSize*4)&" bytes", C_SCOPE);
                        if TlpSize /= 128 and TlpSize /= 64 and TlpSize /= 32 and TlpSize /= 16 then
                            if TlpSize = 0 and s_axis_rq.tdata(78 + (to_integer(unsigned(axis_rq_tuser_bits.is_sop0_ptr)) * 128) downto 75 + (to_integer(unsigned(axis_rq_tuser_bits.is_sop0_ptr)) * 128)) = "0000" then
                                log(ID_LOG_HDR,"TLP Size 0, read request", C_SCOPE);
                            else
                                error("Wrong TLP Size: " & to_string(TlpSize*4) & " bytes");
                            end if;
                        end if;

                    end if;

                    TlpSize := TlpSize + 16;
                    if axis_rq_tuser_bits.is_sop = "0001" then
                        if axis_rq_tuser_bits.is_sop0_ptr = "00" then
                            TlpSize := 12;
                        elsif axis_rq_tuser_bits.is_sop0_ptr = "10" then
                            TlpSize := 4;
                        else
                            error("Wrong sop0_ptr" & to_hstring(axis_rq_tuser_bits.is_sop0_ptr));
                        end if;
                    end if;
                elsif DATA_WIDTH = 256 then--TLP size calculation for 256b.
                    if s_axis_rq.tlast = '0' then
                        TlpSize := TlpSize + 8;
                    else
                        if TlpSize /= 128 and TlpSize /= 64 and TlpSize /= 16 and TlpSize /= 8 then
                            if TlpSize = 0 and s_axis_rq.tdata(78 downto 75) = "0000" then
                                log(ID_LOG_HDR,"TLP Size 0, read request", C_SCOPE);
                            else
                                error("Wrong TLP Size: " & to_string(TlpSize*4) & " bytes");
                            end if;
                        end if;
                        TlpSize := 0;
                    end if;
                else
                    error("Unsupported DATA_WIDTH " & to_string(DATA_WIDTH) & " bits");
                end if;
                --End of TLP size calculation


                if (ToHost_tlp_busy = '0' and DATA_WIDTH=256) or
                   (axis_rq_tuser_bits.is_sop /= "0000" and (DATA_WIDTH=512 or DATA_WIDTH=1024)) then --first word, decode everything from header.
                    if axis_rq_tuser_bits.is_eop /= "0000" and axis_rq_tuser_bits.is_sop0_ptr /= "00" then
                        ToHostMem(ToHostMemorySelect)(ToHostIndex) <=  s_axis_rq.tdata(127+256*RqStraddlePosition downto 0) & ToHost_pipe_data(DATA_WIDTH-129-(256*RqStraddlePosition) downto 0); --write the TLP in the host memory.
                        ToHost_pipe_data(DATA_WIDTH-129-(256*RqStraddlePosition) downto 0) := s_axis_rq.tdata(DATA_WIDTH-1 downto 128+256*RqStraddlePosition);
                        if ToHostIndex < 127 then
                            ToHostIndex := ToHostIndex + 1;
                        else
                            DoCompare <= '1';
                            ToHostMemorySelect_s <= ToHostMemorySelect;
                        end if;
                    end if;
                    if axis_rq_tuser_bits.is_sop = "0001" then --single TLP starts here, but only used in 512b mode.
                        if DATA_WIDTH < 1024 then
                            RqStraddlePosition := to_integer(unsigned(axis_rq_tuser_bits.is_sop0_ptr(1 downto 1))); --is_sop0_ptr is either "10" or "00". "01" and "11" are reserved. for 512b
                        else
                            RqStraddlePosition := to_integer(unsigned(axis_rq_tuser_bits.is_sop0_ptr(1 downto 0))); --is_sop0_ptr is either "10" or "00". "01" and "11" are reserved. for 512b
                        end if;
                    else
                        RqStraddlePosition := 0;
                    end if;
                    rq_header := s_axis_rq.tdata(127+256*RqStraddlePosition downto 256*RqStraddlePosition);
                    address := rq_header(63 downto 2) & "00";
                    check_value(address(47 downto 16), x"0000_0000", ERROR, "Request address bits 47..16 must be 0", C_SCOPE);
                    if(address(63 downto 52) = x"AA0") then
                        ToHostMemorySelect := to_integer(unsigned(address(51 downto 48)));
                    end if;
                    dword_count := rq_header(74 downto 64);
                    request_type := rq_header(78 downto 75);
                    ToHost_pipe_data(DATA_WIDTH-129-(256*RqStraddlePosition) downto 0) := s_axis_rq.tdata(DATA_WIDTH-1 downto 128+256*RqStraddlePosition);
                    if request_type = "0000" then
                        --! Swap odd and even TLPs to emulate out of order TLP transmission over PCIe, to reproduce FLX-2183
                        address_out_of_order := address;
                        address_out_of_order(f_log2(TLP_SIZE_FROMHOST)) := not address(f_log2(TLP_SIZE_FROMHOST));

                        FromHostIndexes(TLPsToSend) := to_integer(unsigned(address_out_of_order(f_log2(DATA_WIDTH)+3 downto f_log2(DATA_WIDTH)-3)));
                        FromHostAddresses(TLPsToSend) := address_out_of_order;
                        FromHostDwordCounts(TLPsToSend) := dword_count;

                        log("FromHostIndex: "&to_string(FromHostIndexes(TLPsToSend))& " address: "&to_hstring(address_out_of_order)&" bytes: "&to_string(to_integer(unsigned(dword_count))*4), C_SCOPE);

                        TLPsToSend := TLPsToSend + 1;

                    end if;
                    if request_type = "0001" then
                        ToHostIndex := to_integer(unsigned(address(f_log2(DATA_WIDTH)+3 downto f_log2(DATA_WIDTH)-3)));
                        log("ToHostIndex: "&to_string(ToHostIndex)& " for memory: "&to_string(ToHostMemorySelect)&" address: "&to_hstring(address), C_SCOPE);
                        if ToHostIndex = 0 then
                            ToHostWrapCount <= ToHostWrapCount + 1;
                            ClearToHostMem <= '1';
                        end if;

                    end if;
                    ToHost_tlp_busy := '1';
                else
                    ToHostMem(ToHostMemorySelect)(ToHostIndex) <=  s_axis_rq.tdata(127+256*RqStraddlePosition downto 0) & ToHost_pipe_data(DATA_WIDTH-129-(256*RqStraddlePosition) downto 0); --write the TLP in the host memory.
                    ToHost_pipe_data(DATA_WIDTH-129-(256*RqStraddlePosition) downto 0) := s_axis_rq.tdata(DATA_WIDTH-1 downto 128+256*RqStraddlePosition);
                    if ToHostIndex < 127 then
                        ToHostIndex := ToHostIndex + 1;
                    else
                        DoCompare <= '1';
                        ToHostMemorySelect_s <= ToHostMemorySelect;
                    end if;
                end if;
                if s_axis_rq.tlast = '1' then --only for 256b operation.
                    ToHost_tlp_busy := '0';
                end if;

            end if;

            --tkeep and tlast are ignored with RC frame straddle.
            m_axis_rc.tlast <= '0';
            m_axis_rc.tkeep <= (others => '1');


            if(m_axis_r_rc.tready = '1') then
                m_axis_rc.tvalid <= '0';
                m_axis_rc.tdata <= (others => '0');
                m_axis_rc.tuser <= (others => '0');
                m_axis_rc.tkeep <= (others => '0');
                axis_rc_tuser_bits.byte_en(127 downto DATA_WIDTH/8) := (others => '0');
                axis_rc_tuser_bits.byte_en(DATA_WIDTH/8-1 downto 0) := (others => '1');
                axis_rc_tuser_bits.is_sop := "00000000";
                axis_rc_tuser_bits.is_sop0_ptr := "000";
                axis_rc_tuser_bits.is_sop1_ptr := "000";
                axis_rc_tuser_bits.is_sop2_ptr := "000";
                axis_rc_tuser_bits.is_sop3_ptr := "000";
                axis_rc_tuser_bits.is_sop4_ptr := "000";
                axis_rc_tuser_bits.is_sop5_ptr := "000";
                axis_rc_tuser_bits.is_sop6_ptr := "000";
                axis_rc_tuser_bits.is_sop7_ptr := "000";
                axis_rc_tuser_bits.is_eop := "00000000";
                axis_rc_tuser_bits.is_eop0_ptr := "00000";
                axis_rc_tuser_bits.is_eop1_ptr := "00000";
                axis_rc_tuser_bits.is_eop2_ptr := "00000";
                axis_rc_tuser_bits.is_eop3_ptr := "00000";
                axis_rc_tuser_bits.is_eop4_ptr := "00000";
                axis_rc_tuser_bits.is_eop5_ptr := "00000";
                axis_rc_tuser_bits.is_eop6_ptr := "00000";
                axis_rc_tuser_bits.is_eop7_ptr := "00000";
                axis_rc_tuser_bits.discontinue := '0';
                axis_rc_tuser_bits.parity := (others => '0');


                if TLPsToSend > 0 and TlpIndex = 0 then --Transmit the RC header, and if we need to finish a straddled TLP, finish that too.
                    log("TlpIndex: (TlpIndex = 0): "&to_string(TlpIndex), C_SCOPE);
                    if RcStraddlePosition /= 0 then
                        axis_rc_tuser_bits.is_eop := "00000001";
                        axis_rc_tuser_bits.is_eop0_ptr := std_logic_vector(to_unsigned(4*RcStraddlePosition+2,5));
                        m_axis_rc.tdata(128*RcStraddlePosition-1 downto 0) <= x"0000_0000"& FromHost_pipe_data(128*(RcStraddlePosition-1)+95 downto 0);
                    end if;
                    rc_header(95 downto 0) := (others => '0'); --only dword_count and address are used.
                    rc_header(42 downto 32) := FromHostDwordCounts(0);
                    rc_header(11 downto 0) := FromHostAddresses(0)(11 downto 0);
                    m_axis_rc.tdata(128*RcStraddlePosition+95 downto 128*RcStraddlePosition) <= rc_header;
                    m_axis_rc.tdata(DATA_WIDTH-1 downto 128*RcStraddlePosition+96) <= FromHostMem(FromHostIndexes(0))(DATA_WIDTH-97-(128*RcStraddlePosition) downto 0);
                    FromHost_pipe_data(128*RcStraddlePosition+95 downto 0) := FromHostMem(FromHostIndexes(0))(DATA_WIDTH-1 downto DATA_WIDTH-96-(128*RcStraddlePosition));

                    axis_rc_tuser_bits.is_sop := "00000001"; --Single TLP starting in this beat.
                    axis_rc_tuser_bits.is_sop0_ptr := std_logic_vector(to_unsigned(RcStraddlePosition,3));
                    m_axis_rc.tvalid <= '1';
                    TlpIndex := to_integer(unsigned(FromHostDwordCounts(0)(10 downto f_log2(DATA_WIDTH)-5)));
                    log("TlpIndex: (fhwc): "&to_string(TlpIndex), C_SCOPE);
                    if RcStraddlePosition /= DATA_WIDTH/128-1 then
                        TlpIndex := TlpIndex-1; --One beat more for a TLP if we start in the last straddle position.
                    end if;
                    --! Check that we are at the end of the second-last TLP, since we swap odd and even TLPs to emulate out of order TLP transmission over PCIe, to reproduce FLX-2183
                    if FromHostIndexes(0) = 127 - ((TLP_SIZE_FROMHOST*8)/DATA_WIDTH) then
                        IncreaseFromHostWrapCount := '1';
                    end if;
                    if(FromHostIndexes(0) < 127) then
                        FromHostIndexes(0) := FromHostIndexes(0) + 1;
                    end if;

                --responding <= '1';
                elsif TlpIndex > 0 then --Send everything except the last beat, or all beats if we can straddle the last beat completely.
                    log("TlpIndex: (elsif TlpIndex >0): "&to_string(TlpIndex), C_SCOPE);
                    m_axis_rc.tdata(128*RcStraddlePosition+95 downto 0) <= FromHost_pipe_data(128*RcStraddlePosition+95 downto 0);
                    m_axis_rc.tvalid <= '1';
                    m_axis_rc.tdata(DATA_WIDTH-1 downto 128*RcStraddlePosition+96) <= FromHostMem(FromHostIndexes(0))(DATA_WIDTH-(97+128*RcStraddlePosition) downto 0);
                    FromHost_pipe_data(128*RcStraddlePosition+95 downto 0) := FromHostMem(FromHostIndexes(0))(DATA_WIDTH-1 downto DATA_WIDTH-(128*RcStraddlePosition+96));
                    --! Check that we are at the end of the second-last TLP, since we swap odd and even TLPs to emulate out of order TLP transmission over PCIe, to reproduce FLX-2183
                    if FromHostIndexes(0) = 127 - ((TLP_SIZE_FROMHOST*8)/DATA_WIDTH) then
                        IncreaseFromHostWrapCount := '1';
                    end if;

                    if(FromHostIndexes(0) < 127 and FromHostDwordCounts(0) > (DATA_WIDTH/32)) then
                        FromHostIndexes(0) := FromHostIndexes(0) + 1;
                    end if;
                    if TlpIndex = 1 then
                        if(RcStraddlePosition = (DATA_WIDTH/128-1)) then --If we can complete a complete straddle in this beat, include eop info
                            axis_rc_tuser_bits.is_eop := "00000001";
                            axis_rc_tuser_bits.is_eop0_ptr := std_logic_vector(to_unsigned(4*RcStraddlePosition+2,5));

                        end if;
                        if RcStraddlePosition < DATA_WIDTH/128-1 then
                            RcStraddlePosition := RcStraddlePosition + 1;
                        else
                            RcStraddlePosition := 0;
                        end if;
                        if IncreaseFromHostWrapCount = '1' then
                            FromHostWrapCount <= FromHostWrapCount + 1;
                            IncreaseFromHostWrapCount := '0';
                        end if;
                        --responding <= '1';
                        TLPsToSend := TLPsToSend - 1 ;
                        for i in 1 to TLPsToSend loop
                            FromHostIndexes(i-1) := FromHostIndexes(i);
                            FromHostAddresses(i-1) := FromHostAddresses(i);
                            FromHostDwordCounts(i-1) := FromHostDwordCounts(i);
                        end loop;


                    end if;

                    TlpIndex := TlpIndex - 1;
                elsif RcStraddlePosition /= 0 then --Not starting a new TLP, finish the pipeline and set RcStraddlePosition to 0.
                    log("TlpIndex: (else): "&to_string(TlpIndex), C_SCOPE);

                    axis_rc_tuser_bits.is_eop := "00000001";
                    axis_rc_tuser_bits.is_eop0_ptr := std_logic_vector(to_unsigned(4*RcStraddlePosition+2,5));
                    m_axis_rc.tdata(128*RcStraddlePosition-1 downto 0) <= x"0000_0000"& FromHost_pipe_data(128*(RcStraddlePosition-1)+95 downto 0);
                    m_axis_rc.tvalid <= '1';
                    RcStraddlePosition := 0;
                end if;

                if DATA_WIDTH = 256 then
                    m_axis_rc.tuser(31 downto 0) <= axis_rc_tuser_bits.byte_en(31 downto 0);
                    m_axis_rc.tuser(33 downto 32) <= axis_rc_tuser_bits.is_sop(1 downto 0);
                    m_axis_rc.tuser(34) <= axis_rc_tuser_bits.is_eop(0);
                    m_axis_rc.tuser(38) <= axis_rc_tuser_bits.is_eop(1);
                    m_axis_rc.tuser(37 downto 35) <= axis_rc_tuser_bits.is_eop0_ptr(2 downto 0);
                    m_axis_rc.tuser(41 downto 39) <= axis_rc_tuser_bits.is_eop1_ptr(2 downto 0);
                    m_axis_rc.tuser(42) <= axis_rc_tuser_bits.discontinue;
                    m_axis_rc.tuser(74 downto 43) <= axis_rc_tuser_bits.parity(31 downto 0);
                elsif DATA_WIDTH = 512 then --512b
                    m_axis_rc.tuser(63 downto 0)   <= axis_rc_tuser_bits.byte_en(63 downto 0);
                    m_axis_rc.tuser(67 downto 64)  <= axis_rc_tuser_bits.is_sop(3 downto 0);
                    m_axis_rc.tuser(69 downto 68)  <= axis_rc_tuser_bits.is_sop0_ptr(1 downto 0) ;
                    m_axis_rc.tuser(71 downto 70)  <= axis_rc_tuser_bits.is_sop1_ptr(1 downto 0) ;
                    m_axis_rc.tuser(73 downto 72)  <= axis_rc_tuser_bits.is_sop2_ptr(1 downto 0) ;
                    m_axis_rc.tuser(75 downto 74)  <= axis_rc_tuser_bits.is_sop3_ptr(1 downto 0) ;
                    m_axis_rc.tuser(79 downto 76)  <= axis_rc_tuser_bits.is_eop(3 downto 0)      ;
                    m_axis_rc.tuser(83 downto 80)  <= axis_rc_tuser_bits.is_eop0_ptr(3 downto 0) ;
                    m_axis_rc.tuser(87 downto 84)  <= axis_rc_tuser_bits.is_eop1_ptr(3 downto 0) ;
                    m_axis_rc.tuser(91 downto 88)  <= axis_rc_tuser_bits.is_eop2_ptr(3 downto 0) ;
                    m_axis_rc.tuser(95 downto 92)  <= axis_rc_tuser_bits.is_eop3_ptr(3 downto 0) ;
                    m_axis_rc.tuser(96)            <= axis_rc_tuser_bits.discontinue ;
                    m_axis_rc.tuser(160 downto 97) <= axis_rc_tuser_bits.parity(63 downto 0)     ;
                elsif DATA_WIDTH = 1024 then
                    m_axis_rc.tuser(127 downto 0)   <= axis_rc_tuser_bits.byte_en(127 downto 0);
                    m_axis_rc.tuser(135 downto 128)  <= axis_rc_tuser_bits.is_sop(7 downto 0);
                    m_axis_rc.tuser(138 downto 136)  <= axis_rc_tuser_bits.is_sop0_ptr(2 downto 0) ;
                    m_axis_rc.tuser(141 downto 139)  <= axis_rc_tuser_bits.is_sop1_ptr(2 downto 0) ;
                    m_axis_rc.tuser(144 downto 142)  <= axis_rc_tuser_bits.is_sop2_ptr(2 downto 0) ;
                    m_axis_rc.tuser(147 downto 145)  <= axis_rc_tuser_bits.is_sop3_ptr(2 downto 0) ;
                    m_axis_rc.tuser(150 downto 148)  <= axis_rc_tuser_bits.is_sop4_ptr(2 downto 0) ;
                    m_axis_rc.tuser(153 downto 151)  <= axis_rc_tuser_bits.is_sop5_ptr(2 downto 0) ;
                    m_axis_rc.tuser(156 downto 154)  <= axis_rc_tuser_bits.is_sop6_ptr(2 downto 0) ;
                    m_axis_rc.tuser(159 downto 157)  <= axis_rc_tuser_bits.is_sop7_ptr(2 downto 0) ;
                    m_axis_rc.tuser(167 downto 160)  <= axis_rc_tuser_bits.is_eop(7 downto 0)      ;
                    m_axis_rc.tuser(172 downto 168)  <= axis_rc_tuser_bits.is_eop0_ptr(4 downto 0) ;
                    m_axis_rc.tuser(177 downto 173)  <= axis_rc_tuser_bits.is_eop1_ptr(4 downto 0) ;
                    m_axis_rc.tuser(182 downto 178)  <= axis_rc_tuser_bits.is_eop2_ptr(4 downto 0) ;
                    m_axis_rc.tuser(187 downto 183)  <= axis_rc_tuser_bits.is_eop3_ptr(4 downto 0) ;
                    m_axis_rc.tuser(192 downto 188)  <= axis_rc_tuser_bits.is_eop4_ptr(4 downto 0) ;
                    m_axis_rc.tuser(197 downto 193)  <= axis_rc_tuser_bits.is_eop5_ptr(4 downto 0) ;
                    m_axis_rc.tuser(202 downto 198)  <= axis_rc_tuser_bits.is_eop6_ptr(4 downto 0) ;
                    m_axis_rc.tuser(207 downto 203)  <= axis_rc_tuser_bits.is_eop7_ptr(4 downto 0) ;
                    m_axis_rc.tuser(208)            <= axis_rc_tuser_bits.discontinue ;
                    m_axis_rc.tuser(336 downto 209) <= axis_rc_tuser_bits.parity(127 downto 0)     ;
                else
                    error("Unsupported DATA_WIDTH " & to_string(DATA_WIDTH) & " bits");
                end if;
            end if; --tready
            RcStraddlePosition_s <= RcStraddlePosition;
            RqStraddlePosition_s <= RqStraddlePosition;
        end if;
    end process;

    compare_proc: process(ToHostMem, ToHostMemorySelect_s, DoCompare, do_finish_fromhost)
        type slv32_array is array(0 to NUMBER_OF_DESCRIPTORS-2) of std_logic_vector(31 downto 0);
        variable cnt: slv32_array:=(others => (others => '0'));
        variable compare_value: std_logic_vector(DATA_WIDTH-1 downto 0);
        constant FINAL_CNT_VAL: std_logic_vector(31 downto 0) := x"0000_8000";
        variable do_finish_tohost: boolean := false;
    begin
        switch_off_tohost <= '0';
        if DoCompare = '1' and do_finish_tohost = false then
            for i in 0 to 127 loop
                if DATA_WIDTH = 1024 then
                    compare_value :=
                                     std_logic_vector(to_unsigned(ToHostMemorySelect_s,8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<256>)" -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                                     x"11_1111"&cnt(ToHostMemorySelect_s)&
                                     x"2222_2222"&cnt(ToHostMemorySelect_s)&
                                     x"3333_3333"&cnt(ToHostMemorySelect_s)&
                                     x"4444_4444"&cnt(ToHostMemorySelect_s)&
                                     x"5555_5555"&cnt(ToHostMemorySelect_s)&
                                     x"6666_6666"&cnt(ToHostMemorySelect_s)&
                                     x"7777_7777"&cnt(ToHostMemorySelect_s)&
                                     x"8888_8888"&cnt(ToHostMemorySelect_s)&
                                     x"9999_9999"&cnt(ToHostMemorySelect_s)&
                                     x"aaaa_aaaa"&cnt(ToHostMemorySelect_s)&
                                     x"bbbb_bbbb"&cnt(ToHostMemorySelect_s)&
                                     x"cccc_cccc"&cnt(ToHostMemorySelect_s)&
                                     x"dddd_dddd"&cnt(ToHostMemorySelect_s)&
                                     x"eeee_eeee"&cnt(ToHostMemorySelect_s)&
                                     x"ffff_ffff"&cnt(ToHostMemorySelect_s)&
                                     x"0101_0101"&cnt(ToHostMemorySelect_s);
                elsif DATA_WIDTH = 512 then
                    compare_value :=
                                     std_logic_vector(to_unsigned(ToHostMemorySelect_s,8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<256>)" -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<512>)"
                                     x"dd_dddd"&cnt(ToHostMemorySelect_s)&
                                     x"cccc_cccc"&cnt(ToHostMemorySelect_s)&
                                     x"bbbb_bbbb"&cnt(ToHostMemorySelect_s)&
                                     x"aaaa_aaaa"&cnt(ToHostMemorySelect_s)&
                                     x"9999_9999"&cnt(ToHostMemorySelect_s)&
                                     x"8888_8888"&cnt(ToHostMemorySelect_s)&
                                     x"7777_7777"&cnt(ToHostMemorySelect_s)&
                                     x"6666_6666"&cnt(ToHostMemorySelect_s);
                elsif DATA_WIDTH = 256 then
                    compare_value :=
                                     std_logic_vector(to_unsigned(ToHostMemorySelect_s,8))& -- @suppress "Incorrect array size in assignment: expected (<DATA_WIDTH>) but was (<256>)"
                                     x"dd_dddd"&cnt(ToHostMemorySelect_s)&
                                     x"cccc_cccc"&cnt(ToHostMemorySelect_s)&
                                     x"bbbb_bbbb"&cnt(ToHostMemorySelect_s)&
                                     x"aaaa_aaaa"&cnt(ToHostMemorySelect_s);
                else
                    error("Unsupported DATA_WDITH: " & to_string(DATA_WIDTH) & " bits");
                end if;
                assert ToHostMem(ToHostMemorySelect_s)(i) = compare_value report "Memory: "&to_string(ToHostMemorySelect_s)&" index: "&to_string(i)&" expected:"&
                to_hstring(compare_value) & " value: " & to_hstring(ToHostMem(ToHostMemorySelect_s)(i)) severity error;
                check_value(ToHostMem(ToHostMemorySelect_s)(i), compare_value,ERROR, "Check counter value in ToHost memory", C_SCOPE);
                cnt(ToHostMemorySelect_s) := cnt(ToHostMemorySelect_s) + 1;
            end loop;
            do_finish_tohost := true;
            for i in 0 to NUMBER_OF_DESCRIPTORS-2 loop
                if cnt(i) < FINAL_CNT_VAL then
                    do_finish_tohost := false;
                end if;
            end loop;
        end if;
        if(do_finish_tohost) then
            switch_off_tohost <= '1';
        end if;
        if(do_finish_tohost and do_finish_fromhost) then
            report_alert_counters(FINAL); -- Report final counters and print conclusion for simulation (Success/Fail)
            log(ID_LOG_HDR, "SIMULATION COMPLETED", C_SCOPE);

            -- Finish the simulation
            user_lnk_up <= '0';
        end if;
    end process;



    tready_proc: process(user_clk)
        variable rnd: std_logic_vector(9 downto 0);
    begin
        if rising_edge(user_clk) then
            if user_lnk_up = '0' then --Finish simulation.
                std.env.stop;
            end if;
            rnd := random(10);
            if rnd < 256 then
                s_axis_r_rq.tready <= '0';
            else
                s_axis_r_rq.tready <= '1';
            end if;
        end if;

    end process;

    reset <= not sys_rst_n;
    clk   <= sysclk_gen;
    user_clk <= sysclk_gen;


end sim;
